#coding=utf-8

import urllib2
from bs4 import BeautifulSoup
class BIQUGE:
    def __init__(self):
        pass

    def getOneBook(self,url='http://www.biquge.la/book/392/'):

        soup = BeautifulSoup(urllib2.urlopen(url).read())

        info = soup.find('div',attrs={'id':'info'})
        bookname = info.find('h1').getText()
        author = info.find('p').getText()

        bookid = url.split('/')[-2]

        dd = soup.findAll('dd')
        text_url = []
        for i in dd:
            a = i.find('a')
            if not a:
                continue
            text_url.append([a.getText(),url+a.get('href')])
        return [bookid,bookname,author],text_url

    def getOnePage(self,url='http://www.biquge.la/book/392/296165.html'):

        soup = BeautifulSoup(urllib2.urlopen(url).read())

        content = soup.find('div',attrs={'id':'content'})
        
        return content

if __name__=='__main__':
    import os
    book = BIQUGE()
    text_url = book.getOneBook('http://www.biquge.la/book/1/')
    BASE_DIR = os.path.dirname(__file__)
    booknum = 1
    k = 1
    path = os.path.join(BASE_DIR,str(booknum))
    
    if not os.path.isdir(path):
        os.makedirs(path)

    for text,url in text_url:
        print text
        content = book.getOnePage(url)
        
        with open(os.path.join(path,'%d.html'%k),'w') as f:
            f.write(str(content))

        k += 1


        
